suppressPackageStartupMessages(library(tidyverse))
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities
Settings
theme_set(
theme_classic(base_size = 7) +
theme(
legend.position = 'bottom'
)
)
wd <- '~/Google Drive/My Drive/Analysis/METTL2A/'
paste_wd <- function(path) {
paste0(wd, path)
}
figdir <- 'Figures/DRS/PolyA/' |> paste_wd()
tabledir <- 'Tables/DRS/PolyA/' |> paste_wd()
Read data
chrM_polyA_length_data <-
read_tsv('Tables/DRS/PolyA/chrM_polyA_length_data_2025-07-11.tsv.gz' |> paste_wd())
## Rows: 2072277 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (12): readname, transcript_id, qc_tag, sample_num, type, rep, transcript...
## dbl (8): position, leader_start, adapter_start, polya_start, transcript_sta...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
chrM_polyA_length_data
## # A tibble: 2,072,277 × 20
## readname transcript_id position leader_start adapter_start polya_start
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 e2291977-a67e-… ENST00000389… 0 2 3 9363
## 2 3da7d287-48ee-… ENST00000389… 0 2 3 12065
## 3 daa33f45-6c8e-… ENST00000389… 0 2 3 12664
## 4 8d12dc33-6bb7-… ENST00000389… 0 2 3 7793
## 5 ed309b03-0167-… ENST00000387… 0 -1 -1 -1
## 6 498c7f2b-44b3-… ENST00000387… 0 540 2462 5598
## 7 f0e4f4ca-c04c-… ENST00000389… 0 2 3 7919
## 8 5a38a52b-d271-… ENST00000387… 0 -1 -1 -1
## 9 7e677234-de8e-… ENST00000389… 0 265 611 6969
## 10 25aba1c2-5e65-… ENST00000389… 0 2 3 5990
## # ℹ 2,072,267 more rows
## # ℹ 14 more variables: transcript_start <dbl>, read_rate <dbl>,
## # polya_length <dbl>, qc_tag <chr>, sample_num <chr>, type <chr>, si <dbl>,
## # rep <chr>, transcript_type <chr>, transcript_name <chr>, gene_id <chr>,
## # gene_type <chr>, gene_name <chr>, seqname <chr>
unique(chrM_polyA_length_data$transcript_type)
## [1] "Mt_rRNA" "Mt_tRNA" "protein_coding" NA
Boxplot
chrM_polyA_length_data_boxplot <-
chrM_polyA_length_data |>
filter(polya_length > 0) |>
filter(transcript_type %in% c('Mt_rRNA', 'protein_coding')) |>
#filter()
ggplot(aes(
x = reorder(transcript_name, polya_length, FUN = median),
y = polya_length,
#color = transcript_type,
fill = transcript_type
)) +
#geom_violin() +
geom_boxplot(outliers = FALSE) +
geom_hline(yintercept = c(0), alpha = 1/2) +
scale_fill_manual(values = c('#DE555A', '#DE559F')) +
scale_y_continuous(breaks = seq(0, 120, 20)) +
labs(x = '', y = 'poly(A) length (nt)') +
coord_flip()
chrM_polyA_length_data_boxplot |>
ggsave_pdf(outdir = figdir, width = 6, height = 7)
